1 package org.apache.lucene.sandbox.queries;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.BufferedReader;
21 import java.io.InputStream;
22 import java.io.InputStreamReader;
23 import java.nio.charset.StandardCharsets;
24
25 import org.apache.lucene.analysis.Analyzer;
26 import org.apache.lucene.analysis.MockAnalyzer;
27 import org.apache.lucene.analysis.MockTokenizer;
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.Field;
30 import org.apache.lucene.index.IndexReader;
31 import org.apache.lucene.index.RandomIndexWriter;
32 import org.apache.lucene.index.Term;
33 import org.apache.lucene.search.IndexSearcher;
34 import org.apache.lucene.search.MultiTermQuery;
35 import org.apache.lucene.search.TopDocs;
36 import org.apache.lucene.search.similarities.DefaultSimilarity;
37 import org.apache.lucene.store.Directory;
38 import org.apache.lucene.util.IOUtils;
39 import org.apache.lucene.util.LuceneTestCase;
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 public class TestSlowFuzzyQuery2 extends LuceneTestCase {
64
65 static final float epsilon = 0.00001f;
66
67 static int[][] mappings = new int[][] {
68 new int[] { 0x40, 0x41 },
69 new int[] { 0x40, 0x0195 },
70 new int[] { 0x40, 0x0906 },
71 new int[] { 0x40, 0x1040F },
72 new int[] { 0x0194, 0x0195 },
73 new int[] { 0x0194, 0x0906 },
74 new int[] { 0x0194, 0x1040F },
75 new int[] { 0x0905, 0x0906 },
76 new int[] { 0x0905, 0x1040F },
77 new int[] { 0x1040E, 0x1040F }
78 };
79 public void testFromTestData() throws Exception {
80
81 assertFromTestData(mappings[random().nextInt(mappings.length)]);
82 }
83
84 public void assertFromTestData(int codePointTable[]) throws Exception {
85 if (VERBOSE) {
86 System.out.println("TEST: codePointTable=" + codePointTable);
87 }
88 InputStream stream = getClass().getResourceAsStream("fuzzyTestData.txt");
89 BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
90
91 int bits = Integer.parseInt(reader.readLine());
92 int terms = (int) Math.pow(2, bits);
93
94 Directory dir = newDirectory();
95 Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
96 RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setMergePolicy(newLogMergePolicy()));
97
98 Document doc = new Document();
99 Field field = newTextField("field", "", Field.Store.NO);
100 doc.add(field);
101
102 for (int i = 0; i < terms; i++) {
103 field.setStringValue(mapInt(codePointTable, i));
104 writer.addDocument(doc);
105 }
106
107 IndexReader r = writer.getReader();
108 IndexSearcher searcher = newSearcher(r);
109 if (VERBOSE) {
110 System.out.println("TEST: searcher=" + searcher);
111 }
112
113
114 searcher.setSimilarity(new DefaultSimilarity());
115
116 writer.close();
117 String line;
118 while ((line = reader.readLine()) != null) {
119 String params[] = line.split(",");
120 String query = mapInt(codePointTable, Integer.parseInt(params[0]));
121 int prefix = Integer.parseInt(params[1]);
122 int pqSize = Integer.parseInt(params[2]);
123 float minScore = Float.parseFloat(params[3]);
124 SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix);
125 q.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize));
126 int expectedResults = Integer.parseInt(reader.readLine());
127 TopDocs docs = searcher.search(q, expectedResults);
128 assertEquals(expectedResults, docs.totalHits);
129 for (int i = 0; i < expectedResults; i++) {
130 String scoreDoc[] = reader.readLine().split(",");
131 assertEquals(Integer.parseInt(scoreDoc[0]), docs.scoreDocs[i].doc);
132 assertEquals(Float.parseFloat(scoreDoc[1]), docs.scoreDocs[i].score, epsilon);
133 }
134 }
135 IOUtils.close(r, dir, analyzer);
136 }
137
138
139 private static String mapInt(int codePointTable[], int i) {
140 StringBuilder sb = new StringBuilder();
141 String binary = Integer.toBinaryString(i);
142 for (int j = 0; j < binary.length(); j++)
143 sb.appendCodePoint(codePointTable[binary.charAt(j) - '0']);
144 return sb.toString();
145 }
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185 }